/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <errno.h>
#include <ctype.h>

#include "udm_config.h"
#include "udmsearch.h"
#include "udm_db.h"
#include "udm_http.h"
#include "udm_parsehtml.h"
#include "udm_host.h"
#include "udm_contentencoding.h"

/******************* Template functions ********************/


static size_t out_string(FILE * stream, char * dst, size_t dst_len, const char * src){
	if(src){
		if(stream)fputs(src,stream);
		if(dst){
			strncat(dst, src, dst_len - 1);
			return(strlen(src));
		}
	}
	return(0);
}

static char * HiLightDup(const char * src,const char * beg, const char * end){
	size_t len=1;
	size_t blen=strlen(beg);
	size_t elen=strlen(end);
	const char * s;
	char  * res, *d;
	
	for(s=src;*s;s++){
		switch(*s){
			case '\2':
				len+=blen;
				break;
			case '\3':
				len+=elen;
				break;
			default:
				len++;
		}
	}
	res=(char*)malloc(len);
	for(s=src,d=res;*s;s++){
		switch(*s){
			case '\2':
				strcpy(d,beg);
				d+=blen;
				break;
			case '\3':
				strcpy(d,end);
				d+=elen;
				break;
			default:
				*d=*s;
				d++;
		}
	}
	*d='\0';
	return(res);
}

static char* RemoveHiLightDup(const char *s){
	size_t	len=strlen(s)+1;
	char	*res=malloc(len);
	char	*d;
	
	for(d=res;s[0];s++){
		switch(s[0]){
			case '\2':
			case '\3':
				break;
			default:
				*d++=*s;
		}
	}
	*d='\0';
	return res;
}

static size_t PrintTextTemplate(FILE * stream, char * dst, size_t dst_len, UDM_VARLIST * vars, const char * templ){
	const char * s;
	size_t dlen=0;
	const char * HlBeg=UdmVarListFindStr(vars,"HlBeg","");
	const char * HlEnd=UdmVarListFindStr(vars,"HlEnd","");
	
	for(s=templ; (*s) && ((stream) || (dlen < dst_len)); s++){
		int type=0;
		char *value=NULL, *eval=NULL, *eval1=NULL;
		char empty[]="";
		size_t maxlen=0;
		size_t curlen=0;

		if(*s=='$'){
			const char * vbeg=NULL, * vend;
			
			if(!strncmp(s,"$(",2)){vbeg=s+2;type='(';}
			else	if(!strncmp(s,"$%(",3)){vbeg=s+3;type='%';}
			else	if(!strncmp(s,"$&(",3)){vbeg=s+3;type='&';}
			else	if(!strncmp(s,"$^(",3)){vbeg=s+3;type='^';}

			if((type)&&(vend=strchr(s,')'))){
				UDM_VAR * var;
				size_t len;
				char name[100]="";
				char * sem;
				
				len=(vend-vbeg);
				if(len>=sizeof(name))len=sizeof(name);
				strncpy(name,vbeg,len);name[len]='\0';
				if((sem=strchr(name,':'))){
					*sem=0;
					maxlen=atoi(sem+1);
				}
				
				if((var=UdmVarListFind(vars,name))){
					value=var->val;
					if(!value)value=empty;
				}else{	
					value=empty;
				}
				
				s=vend;
			}else{
				type=0;
			}
		}
		if(!value)value=empty;
		curlen=strlen(value);
		
		if((curlen>maxlen)&&(maxlen>0)){
			char * newvalue, *p = value + maxlen, *S, *e;
			S = e = p;
			if (isdigit(*e)) {
			  while(isdigit(*S) && S > (value + 1)) S--;
			  S--;
			  if (*S == '&' && S[1] == '#') {
			    while(isdigit(*e)) e++;
			    if (*e == ';') p = e + 1;
			  } 
			} else if (isalpha(*e)) {
			  while(isalpha(*S) && S > value) S--;
			  if (*S == '&') {
			    while(isalpha(*e)) e++;
			    if (*e == ';') p = e + 1;
			  }
			} else if (*e == ';' && S < value ) {
			  S--;
			  if (isdigit(*S)) {
			    while(isdigit(*S) && S > (value + 1)) S--;
			    S--;
			    if (*S == '&' && S[1] == '#') {
			      p = e + 1;
			    }
			  } else if (isalpha(*S)) {
			    while(isalpha(*S) && S > value) S--;
			    if (*S == '&') {
			      p = e + 1;
			    }
			  }
			}
			if ((newvalue = (char*)malloc((size_t)(p - value) + 4))) {
			  strncpy(newvalue, value, (size_t)(p - value));
			  strcpy(newvalue + (p - value), "...");
			  value = newvalue;
			}
		}
		switch(type){
			case '(': 
				eval=RemoveHiLightDup(value);
				dlen+=out_string(stream, dst + dlen, dst_len - dlen, eval);
				free(eval);
				break;
			case '&':
				eval=UdmHtmlSpecialChars(value);
				eval1=HiLightDup(eval,HlBeg,HlEnd);
				dlen+=out_string(stream, dst + dlen, dst_len - dlen, eval1);
				free(eval);
				free(eval1);
				break;
			case '^':
				eval=HiLightDup(value,HlBeg,HlEnd);
				dlen+=out_string(stream, dst + dlen, dst_len - dlen, eval);
				free(eval);
				break;
			case '%':
				eval=(char*)malloc(strlen(value)*3);
				UdmEscapeURL(eval,value);
				dlen+=out_string(stream, dst + dlen, dst_len - dlen, eval);
				free(eval);break;
			default:	/* One character */
				if((stream)&&(*s))fputc(*s,stream);
				if(dst){
					dst[dlen]=*s;
					dlen++;
					dst[dlen]='\0';
				}
		}
		if((curlen>maxlen)&&(maxlen>0)){
			free(value);
		}
	}
	return dlen;
}

#define T_OPT	1
#define T_INP	2

static char * GetHtmlTok(const char * src,const char ** lt){
	char * res;
	size_t len;
	if((!src)&&!(src=*lt))return(NULL);
	if(*src=='<'){
		/* Find ">" and skip "<" */
		if((*lt=strchr(src,'>')))(*lt)++;
	}else{
		/* Find tag beginning */
		*lt=strchr(src,'<');
	}
	if(!(*lt)){
		/* Last token */
		res=strdup(src);
	}else{
		/* Token in the middle */
		len=(*lt)-src;
		res=(char*)malloc(len+2);
		strncpy(res,src,len);
		res[len]='\0';
	}
	return(res);
}

static void PrintHtmlTemplate(UDM_AGENT * Agent, FILE * stream, char * dst, size_t dst_len, UDM_VARLIST * vars, const char * template){
	const char *lt;
	char  *tok;
	size_t dlen=0;
	int i;
	
	tok=GetHtmlTok(template,&lt);
	while(tok){
		int type=0;
		if(!(UDM_STRNCASECMP(tok,"<OPTION"))){
			type=T_OPT;	
		}else
		if(!(UDM_STRNCASECMP(tok,"<INPUT"))){
			type=T_INP;
		}else
		if(!UDM_STRNCASECMP(tok,"<!INCLUDE")){
			if(Agent){
				UDM_HTMLTOK ltag, *tag = &ltag;
				const char *last;
				char *tag_content = NULL;
				UDM_DOCUMENT * Inc=UdmDocInit(NULL);
				size_t max_doc_size = (size_t)UdmVarListFindInt(vars,"MaxDocSize",UDM_MAXDOCSIZE);

				if(!Inc->Buf.buf)Inc->Buf.buf=malloc(max_doc_size);
				Inc->Buf.maxsize=max_doc_size;

				UdmHTMLTOKInit(tag);
				UdmHTMLToken(tok, &last, tag);
				for(i = 0; i < ltag.ntoks; i++) {
					if (ISTAG(i, "content")) {
						tag_content = strndup(ltag.toks[i].val, ltag.toks[i].vlen);
						break;
					}
				}
				if(tag_content){
					char  vurl[UDM_URLSIZE*4]="";
					const char *ce;
					
					PrintTextTemplate(NULL, vurl, 4*UDM_URLSIZE, vars, tag_content);
					UdmURLParse(&Inc->CurURL,vurl);
					UdmVarListAddStr(&Inc->RequestHeaders, "Host", Inc->CurURL.hostname);
					Inc->connp.hostname = strdup(Inc->CurURL.hostname);
					Inc->connp.port = Inc->CurURL.port ? Inc->CurURL.port : Inc->CurURL.default_port;
					
					if(UdmHostLookup(&Agent->Conf->Hosts, &Inc->connp)){
					}
					
					if(UdmGetURL(Agent,Inc)==UDM_OK){
						UdmParseHTTPResponse(Agent,Inc);
						if(Inc->Buf.content){
							ce=UdmVarListFindStr(&Inc->Sections,"Content-Encoding","");
#ifdef HAVE_ZLIB
							if(!strcasecmp(ce,"gzip") || !strcasecmp(ce,"x-gzip")){
								UdmUnGzip(Inc);
							}else
							if(!strcasecmp(ce,"deflate")){
								UdmInflate(Inc);
							}else
							if(!strcasecmp(ce,"compress") || !strcasecmp(ce,"x-compress")){
								UdmUncompress(Inc);
							}
#endif
							if(stream){
								fprintf(stream,"%s",Inc->Buf.content);
							}else{
								/* FIXME: add printing to string */
							}
						}
					}
					free(tag_content);
				}
				UdmDocFree(Inc);
			}
		}else{
			dlen += PrintTextTemplate(stream, dst + dlen, dst_len - dlen, vars, tok);
		}
		if(type){
			char * opt;
			UDM_HTMLTOK ltag, *tag = &ltag;
			const char *last;
			UDM_VAR * var=NULL;
			char * vname = NULL, *value = NULL;
			
			opt=(char*)malloc(strlen(tok)+200);
			UdmHTMLTOKInit(tag);
			UdmHTMLToken(tok, &last, tag);
			sprintf(opt, "<");

			for (i = 0; i < ltag.ntoks; i++) {
			  if (ISTAG(i, "selected")) {
			    vname = strndup(ltag.toks[i].val, ltag.toks[i].vlen);
			  } else if (ISTAG(i, "value")) {
			    value = strndup(ltag.toks[i].val, ltag.toks[i].vlen);
			    sprintf(UDM_STREND(opt), "VALUE=\"%s\" ", value);
			  } else {
			    char *tname = strndup(ltag.toks[i].name, ltag.toks[i].nlen);
			    if (ltag.toks[i].vlen) {
			      char *tval = strndup(ltag.toks[i].val, ltag.toks[i].vlen);
			      sprintf(UDM_STREND(opt), "%s=\"%s\" ", tname, tval);
			      free(tval);
			    } else {
			      sprintf(UDM_STREND(opt), "%s ", tname);
			    }
			    free(tname);
			  }
			}

			if(vname) {
			  var = UdmVarListFindWithValue(vars, UdmTrim(vname, "$()"), value ? value:"");
			}

			sprintf(UDM_STREND(opt), "%s>", var ? "SELECTED":"");

			if (vname) { free(vname); }
			if (value) { free(value); }

			dlen += PrintTextTemplate(stream, dst + dlen, dst_len - dlen, vars, opt);
			free(opt);
		}
		
		free(tok);
		tok=GetHtmlTok(NULL,&lt);
	}
}

void UdmTemplatePrint(UDM_AGENT * Agent, FILE *stream, char *dst, size_t dst_len, UDM_VARLIST *vars, UDM_VARLIST *tm, const char *w){
	size_t	t;
	size_t	matches=0;
	size_t	format=(size_t)UdmVarListFindInt(vars,"o",0);
	UDM_VAR	*First=NULL;
	
	if(dst)*dst='\0';
	for(t=0;t<tm->nvars;t++){
		if(!strcasecmp(w,tm->Var[t].name)){
			if(!First)First=&tm->Var[t];
			if(matches==format){
				PrintHtmlTemplate(Agent, stream, dst, dst_len, vars, tm->Var[t].val);
				return;
			}
			matches++;
		}
	}
	if (First) PrintHtmlTemplate(Agent, stream, dst, dst_len, vars, First->val);
	return;
}

static int ParseVariable(UDM_ENV *Env,UDM_VARLIST *vars,char *str){
	char *tok,*lt;
	
	if((tok=strtok_r(str," \t\r\n",&lt))){
		char * arg=NULL;
					
		if(!strcasecmp(str,"Affix")){
			char aname[1024];
			char * args[5];
			size_t narg=0;
					
			while((tok)&&(narg<5)){
				args[narg++]=tok;
				tok=strtok_r(NULL," \t",&lt);
			}
			if(narg!=4){
				Env->errcode=1;
				sprintf(Env->errstr,"Bad Affix command");
				return(1);
			}
			if(args[3][0]=='/')strncpy(aname,args[3],sizeof(aname)-1);
			else	snprintf(aname,sizeof(aname)-1,"%s/%s",UDM_CONF_DIR,args[3]);
			if(UdmImportAffixes(Env,args[1],args[2],aname)){
				Env->errcode=1;
				snprintf(Env->errstr,sizeof(Env->errstr)-1,"Can't load affix :%s",aname);
				return(1);
			}
		}else
		if(!strcasecmp(str,"Spell")){
			char aname[1024];
			char * args[5];
			size_t narg=0;

			while((tok)&&(narg<5)){
				args[narg++]=tok;
				tok=strtok_r(NULL," \t",&lt);
			}
			if(narg!=4){
				Env->errcode=1;
				sprintf(Env->errstr,"Bad Spell command");
				return(1);
			}
			if(args[3][0]=='/')strncpy(aname,args[3],sizeof(aname)-1);
			else	snprintf(aname,sizeof(aname)-1,"%s/%s",UDM_CONF_DIR,args[3]);
			if(UdmImportDictionary(Env,args[1],args[2],aname,0,"")){
				Env->errcode=1;
				snprintf(Env->errstr,sizeof(Env->errstr)-1,"Can't load dictionary :%s",aname);
				return(1);
			}
		}else
		if(!strcasecmp(str, "IspellUsePrefixes")) {
			char sel[8];
			int val = 1;
			sscanf(str+17, "%4s", sel);
			if (!UDM_STRNCASECMP(sel, "no")) {
				val = 0;
			}
			UdmVarListAddInt(&Env->Vars, "IspellUsePrefixes", val);
		}else
		if(!strcasecmp(str,"StopwordFile")){
			char aname[1024];
			
			arg=strtok_r(NULL," \t\r\n",&lt);
			if(arg){
				if(arg[0]=='/')strncpy(aname,arg,sizeof(aname)-1);
				else	snprintf(aname,sizeof(aname)-1,"%s/%s",UDM_CONF_DIR,arg);
				if(UdmStopListLoad(Env,aname)){
					Env->errcode=1;
					return(1);
				}
			}else{
				Env->errcode=1;
				sprintf(Env->errstr,"Bad StopwordFile command");
				return(1);
			}
		}else
		if(!strcasecmp(str,"Synonym")){
			char aname[1024];
			arg=strtok_r(NULL," \t\r\n",&lt);
			if(arg){
				if(arg[0]=='/')strncpy(aname,arg,sizeof(aname)-1);
				else	snprintf(aname,sizeof(aname)-1,"%s/%s",UDM_CONF_DIR,arg);
				if(UdmSynonymListLoad(Env,aname)){
					Env->errcode=1;
					return(1);
				}
			}else{
				Env->errcode=1;
				sprintf(Env->errstr,"Bad Synonym command");
				return(1);
			}
		}else
		if(!strcasecmp(str,"DBAddr")){
			if((arg=strtok_r(NULL," \t\r\n",&lt))){
				if(!memcmp(arg,"searchd:",8)){
					UDM_URL	Url;
					UdmURLParse(&Url,arg);
					UdmDBListAdd(&Env->sdcl,Url.hostinfo);
				}
				UdmVarListReplaceStr(vars,tok,arg);
			}
		}else
		if((str[0]=='R'||str[0]=='r')&&(str[1]>='0')&&(str[1]<='9')){
			float r;
			int ir;
			arg=strtok_r(NULL," =\t\r\n",&lt); 
			if(arg){
				r=atoi(arg);
				srand((unsigned)time(0));
				r=r*rand()/RAND_MAX; ir=r;
				UdmVarListAddInt(vars,str,ir);
			}
		}else
		if(!strcasecmp(str,"HlBeg")){
			UdmVarListAddStr(vars,"HlBeg",lt);
		}else
		if(!strcasecmp(str,"HlEnd")){
			UdmVarListAddStr(vars,"HlEnd",lt);
		}else
		if(!strcasecmp(str,"Limit")){
			char * sc, * nm;
			arg=strtok_r(NULL," \t\r\n",&lt);
			if((sc=strchr(arg,':'))){
				*sc='\0';sc++;
				nm=(char*)malloc(strlen(arg)+8);
				sprintf(nm,"Limit-%s",arg);
				UdmVarListAddStr(vars,nm,sc);
				free(nm);
			}
		}else
		if(!strcasecmp(str,"CrossWords")){
			UdmVarListReplaceStr(vars,"CrossWords",lt);
		}else
		if(!strcasecmp(str,"Alias")){
			char * arg1;
			
			arg=strtok_r(NULL," \t\r\n",&lt);
			arg1=strtok_r(NULL," \t\r\n",&lt);
			if(arg1){
				UDM_ALIAS Alias;
				Alias.match.pattern=arg;
				Alias.replace=arg1;
				Alias.match.match_type=UDM_MATCH_BEGIN;
				Alias.match.case_sense=0;
				UdmAliasListAdd(&Env->Aliases,&Alias,Env->errstr,sizeof(Env->errstr));
			}
		}else
		if(!strcasecmp(str,"MaxWordLen")){
			arg=strtok_r(NULL," \t\r\n",&lt);
			if(arg)Env->WordParam.max_word_len=atoi(arg);
		}else
		if(!strcasecmp(str,"MinWordLen")){
			arg=strtok_r(NULL," \t\r\n",&lt);
			if(arg)Env->WordParam.min_word_len=atoi(arg);
		}else
		if(!strcasecmp(str,"LocalCharset")){
			arg = strtok_r(NULL, " \t\r\n", &lt);
			if (arg) {
				UdmVarListReplaceStr(vars, tok, arg);
				Env->lcs = UdmGetCharSet(arg);
			}
		}else
		if(!strcasecmp(str,"BrowserCharset")){
			arg = strtok_r(NULL, " \t\r\n", &lt);
			if (arg) {
				UdmVarListReplaceStr(vars, tok, arg);
				Env->bcs = UdmGetCharSet(arg);
			}
		}else{
			arg=strtok_r(NULL," \t\r\n",&lt);
			UdmVarListReplaceStr(vars,tok,arg);
		}
	}
	return 0;
}

/* Load template  */
int UdmTemplateLoad(UDM_ENV * Env,UDM_VARLIST * vars,const char * tname, UDM_VARLIST *tmpl){
	FILE		*file;
	char		str[1024];
	char		ostr[1024];
	const char	*dbaddr=NULL;
	int		variables=0;
	char		cursection[128]="";
	char		*cur=NULL;
	char		nameletter[]=
				"abcdefghijklmnopqrstuvwxyz"
				"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
				"0123456789._";
	
	if(!(file=fopen(tname,"r"))){
		snprintf(Env->errstr,sizeof(Env->errstr)-1,"Unable to open template '%s': %s",tname,strerror(errno));
		return(1);
	}
	
	while(fgets(str,sizeof(str)-1,file)){
		char	*s;
		
		str[sizeof(str)-1]='\0';
		strcpy(ostr,str);
		
		s=UdmTrim(str," \r\n");
		
		if(!strcasecmp(s,"<!--variables")){
			variables=1;
			continue;
		}
		
		if(!strcmp(s,"-->") && variables){
			variables=0;
			continue;
		}
		
		if(variables){
			int r;
			if(!*s)continue;
			if(*s=='#')continue;
			
			if((r=ParseVariable(Env,vars,s)))
				return r;
			continue;
		}
		
		if(!memcmp(s,"<!--",4)){
			char *e;
			
			for(e=s+4;(*e)&&(strchr(nameletter,*e)||(*e=='/'));e++);
			
			if(!strcmp(e,"-->")){
				*e='\0';
				s+=4;
				
				if(s[0]=='/'){
					if(!strcasecmp(s+1,cursection) && cursection[0]){
						UDM_VAR *I;
						tmpl->Var=(UDM_VAR*)realloc(tmpl->Var,(tmpl->nvars+1)*sizeof(UDM_VAR));
						I=&tmpl->Var[tmpl->nvars];
						I->name=strdup(cursection);
						I->val=strdup(cur?cur:"");
						tmpl->nvars++;
						cursection[0]='\0';
						UDM_FREE(cur);
						continue;
					}
				}else
				if(s[1]){
					strncpy(cursection,s,sizeof(cursection));
					cursection[sizeof(cursection)-1]='\0';
					continue;
				}
			}
		}
		
		if(!cursection[0])
			continue;
		
		if(!cur){
			cur=strdup(ostr);
		}else{
			cur=(char*)realloc(cur,strlen(cur)+strlen(ostr)+1);
			strcat(cur,ostr);
		}
	}
	fclose(file);
	UDM_FREE(cur);
	
	if(Env->Spells.nspell) {
		UdmSortDictionary(&Env->Spells);
		UdmSortAffixes(&Env->Affixes, &Env->Spells);
	}
	UdmSynonymListSort(&Env->Synonyms);
	
	
#ifdef HAVE_SQL
	if(!dbaddr)dbaddr=UdmVarListFindStr(&Env->Vars,"DBAddr","mysql://localhost/mnogosearch/");
#endif
#ifdef HAVE_FILES
	if(!dbaddr)dbaddr=UdmVarListFindStr(&Env->Vars,"DBAddr", "file:" UDM_VAR_DIR UDMSLASHSTR);
#endif
	if(!dbaddr)dbaddr=UdmVarListFindStr(&Env->Vars,"DBAddr", "searchd://localhost/");
	
	if(UDM_OK!=UdmDBSetAddr(Env->db,dbaddr,UDM_OPEN_MODE_READ)){
		sprintf(Env->errstr,"Invalid DBAddr: '%s'",dbaddr);
		Env->errcode=1;
		return 1;
	}
	
	return 0;
}
